Web Scraping

# get data via web scraping from yahoo finance
# focus is on 27 companies that were listed in Dow for 2+ years
companies <- c("MMM", "AXP", "AAPL", "BA", "CAT", "CVX", 
               "CSCO", "KO", "XOM", "GS", "HD", 
               "IBM", "INTC", "JNJ", "JPM", "MCD", "MRK", 
               "MSFT", "NKE", "PG", "TRV", "UNH", 
               "VZ", "V",  "WMT", "WBA", "DIS")

companies.df.list <- rep(NA, length(companies))

for (i in 1:length(companies)){
  assign(paste("data", companies[i], sep = ""), 
         getSymbols(companies[i], auto.assign = F, from ="2019-07-01", to = "2020-06-30"))
}
## 'getSymbols' currently uses auto.assign=TRUE by default, but will
## use auto.assign=FALSE in 0.5-0. You will still be able to use
## 'loadSymbols' to automatically load data. getOption("getSymbols.env")
## and getOption("getSymbols.auto.assign") will still be checked for
## alternate defaults.
## 
## This message is shown once per session and may be disabled by setting 
## options("getSymbols.warning4.0"=FALSE). See ?getSymbols for details.
# datasets are labeled as 'data[STOCK]' e.g. dataAAPL

companies.df <- list(dataMMM, dataAXP, dataAAPL, dataBA, dataCAT, dataCVX, 
                     dataCSCO, dataKO, dataXOM, dataGS, dataHD, 
                     dataIBM, dataINTC, dataJNJ, dataJPM, dataMCD, dataMRK, 
                     dataMSFT, dataNKE, dataPG, dataTRV, dataUNH, 
                     dataVZ, dataV, dataWMT, dataWBA, dataDIS)

Data Cleaning

# create a dataset with 27 stocks and 252 trading days 
# 27 stocks (rows) and 252 returns (columns/features/predictors)

companies.closings <- matrix(data = NA, nrow = length(companies), 
                             ncol = length(dataMMM$MMM.Close))

for (i in 1:length(companies.df)){
  companies.closings[i,] <- as.numeric(companies.df[[i]][,4]) # closings are on the 4th columnm
}

# change the names of the rows
rownames(companies.closings) <- companies

# take the transpose
# each row is a trading day with 29 different stock prices 
# each column is a stock
companies.closings.t <- t(companies.closings)


day <- c(1:nrow(companies.closings.t))

df = as.data.frame(cbind(day, companies.closings.t))

install.packages("plotly")
## Warning: package 'plotly' is in use and will not be installed
library(plotly)

asset1 <- plot_ly(data = df, x = ~day, y = ~MMM, name = 'MMM', type = 'scatter', mode = 'lines', 
                 line = list(color = 'rgb(1, 1, 1)'))

for (i in 2:27){
  asset1 <- asset1 %>% add_trace(y = df[,i], name = companies[i], line = list(color = 'rgb(i, i, i)')) 
}

Asset 2: PCA Biplots

# half year cutoff
half = nrow(companies.closings.t)/2

pca_2019 <- prcomp(companies.closings.t[1 : half, ], scale = TRUE, center = TRUE)
pca_2020 <- prcomp(companies.closings.t[(half + 1) : (2*half), ], scale = TRUE, center = TRUE)

par(mfrow=c(2,1)) 
biplot(pca_2019)
biplot(pca_2020)

par(mfrow=c(1,1))